Imported and cleaned all kiln data available from 2018-2020.
Involved using an algorithm to remove high peaks and valleys, detection of when the “start” of a run was based on setpoint and kiln temperature increases. We now have mostly clean plots with a few exceptions.
Example of setpoint vs average kiln temperature readings from assorted lots from each kiln.
all_kilns <- bind_rows(
kilns_AB %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
kilns_C %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
kilns_D %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
kilns_E %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
kilns_F %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
kilns_G %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln),
kilns_H %>% dplyr::select(time, setpoint, avg_kiln_temp, LOTNO, auc_min, auc_max, kiln)
)
# random sample of LOTNOs
set.seed(505)
n_kilns <- sample_n(all_kilns, 16) %>% dplyr::select(LOTNO) %>% unlist()
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "A"), 2) %>% dplyr::select(LOTNO) %>% unlist()
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "B"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "C"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "D"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "E"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "F"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "G"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- bind_rows(sample_n(all_kilns %>% dplyr::filter(kiln == "H"), 2) %>% dplyr::select(LOTNO) %>% unlist(), n_kilns)
n_kilns <- unlist(n_kilns)
sample_kilns <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns, lot_compare=T)
# random sample of LOTNOs
set.seed(76)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "A"), 16) %>% dplyr::select(LOTNO) %>% unlist()
sample_kilns_a <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns_a)
# random sample of LOTNOs
set.seed(76)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "B"), 16) %>% dplyr::select(LOTNO) %>% unlist()
sample_kilns_b <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns_b)
# random sample of LOTNOs
set.seed(76)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "C"), 16) %>% dplyr::select(LOTNO) %>% unlist()
sample_kilns_c <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns_c)
# random sample of LOTNOs
set.seed(15)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "D"), 16) %>% dplyr::select(LOTNO) %>% unlist()
sample_kilns_d <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns_d)
# random sample of LOTNOs
set.seed(76)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "E"), 16) %>% dplyr::select(LOTNO) %>% unlist()
sample_kilns_e <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns_e)
# random sample of LOTNOs
set.seed(15)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "F"), 16) %>% dplyr::select(LOTNO) %>% unlist()
sample_kilns_f <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns_f)
# random sample of LOTNOs
set.seed(15)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "G"), 16) %>% dplyr::select(LOTNO) %>% unlist()
sample_kilns_g <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns_g)
# random sample of LOTNOs
set.seed(15)
n_kilns <- sample_n(all_kilns %>% dplyr::filter(kiln == "H"), 16) %>% dplyr::select(LOTNO) %>% unlist()
sample_kilns_h <- all_kilns %>%
dplyr::filter(LOTNO %in% n_kilns) %>%
mutate(LOTNO = as.character(LOTNO)) %>%
mutate(LOTNO = factor(LOTNO)) %>%
mutateAucValues()
plot_range(sample_kilns_h)
plot_range(sample_kilns_h, filter = "010819H", plotly_on = TRUE)
Feature engineering: using domain knowledge to extract features from raw data.
One such feature: variation between setpoint and average kiln temperatures between in 400°C to 600°C range. Algorithm was developed and deployed on all kiln data to produce a numeric value labeled AUC or aucDiff (area under curves, area under curves difference, area between the curves, etc…)
Base plot of temperature and setpoint over time, with green area representing the calculated area between the two curves. Numeric values also printed for comparison.
plotAucValues(sample_kilns, x.nudge = 900, y.nudge = 0)
plotAucValues(sample_kilns, crop=T, x.nudge = 0, y.nudge = 200)
plotAucValues(sample_kilns, crop=T, free.x=T)
Should be noted that the extracted values differ significantly between kilns. If we want to compare metrics across kilns in the future, all values will need to be scaled with one another.
Some analyses may also require normalization due to the non-normal distributions.
AUC values differ significantly between kilns.
df_merged_auc %>%
group_by(LOTNO) %>% slice(1) %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
# ggplot(aes(x=aucDiff, y=fct_reorder(KILN,aucDiff), fill =KILN2))+
ggplot(aes(x=aucDiff, y=fct_reorder(KILN2,aucDiff)))+
geom_boxplot(outlier.alpha = 0,
outlier.shape = 21)+
geom_jitter(height = .2, alpha=.1)+
labs(title = "Setpoint vs temperature variation between kilns")+
xlab("Area between curves")+
ylab("Kiln")+
theme(legend.position = 'none')+
scale_x_continuous(labels = scales::label_number())
Non-normal, mostly skewed distributions.
df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
ggplot(aes(x=aucDiff, y = ..count../sum(..count..)))+
geom_density()+
scale_y_continuous(labels = scales::percent_format())+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
facet_wrap(~KILN2, scales='free')
# facet_wrap(~KILN2)
The easiest analysis to perform is a simple comparison of AUC vs Lot yield on a per kiln basis.
This is frankly not expected to be useful as we know compositions and items, among other things may behave quite differently. Lumping them alltogether is a decent start, however.
The low correlation values less than 0.3 indicate weak correlation.
# join correlation of AUC, lot yield to original DF and plot
df <- df_yields_auc %>%
group_by(LOTNO, KILN, aucDiff, temp_avg, precip, snow_fall, snow_depth) %>%
dplyr::summarise(
total_fired = sum(TOTAL_ITEM_FIRED),
total_rejected = sum(TOTAL_ITEM_REJECTED),
pct_lot_yield = (total_fired - total_rejected) / total_fired
) %>%
mutate(KILN2 = str_replace(KILN, "R", ""))
df <- df %>%
group_by(KILN2) %>%
dplyr::summarise(cor = cor(pct_lot_yield, aucDiff)) %>%
left_join(df) %>%
mutate(kiln_cor = factor(paste0(KILN2, " (", round(cor,2), ")")))
df %>%
ggplot(aes(x=aucDiff, y=pct_lot_yield))+
geom_smooth(alpha=.1, color='red',method='lm')+
geom_pointdensity(alpha=.8, size=1)+
scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
facet_wrap(~kiln_cor, scales='free')+
ylab('Lot yield')+
xlab('Area between curves')+
labs(title = 'AUC versus entire lot yields',
subtitle = 'Correlation value (in parentheses)')+
theme(legend.position = 'none')
#
#
# df %>% count(cor, KILN2) %>%
# arrange(-abs(cor)) %>%
# mutate(
# cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
# ) %>%
# set_colnames(c("Correlation", "Kiln", "Observations")) %>%
# kable(format = 'html', escape = 'F') %>%
# kable_styling('striped',full_width = F)
Different items may react differently to different AUC values. We can extract the top items (most lot numbers associated with) from each kiln and generate a plot similar to above.
Is the AUC feature related to individual item yields within each kiln?
# yields df of kiln
df <- df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
dplyr::filter(KILN2 == "A")
# get top items fired in kiln
df_items <- df %>%
count(DESCRIPTION) %>%
arrange(-n) %>%
slice(1:9)
# filter original df for top items
df <- df %>%
dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)
# get cor values and join to original
df_cor <- df %>%
group_by(DESCRIPTION) %>%
dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
left_join(df) %>%
dplyr::select(DESCRIPTION, cor) %>%
group_by(DESCRIPTION) %>% slice(1)
df <- df %>%
left_join(df_cor) %>%
mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>%
right_join(df)
# plot
df %>%
ggplot(aes(x=aucDiff, y=total_item_pct_yield))+
geom_smooth(alpha=.1, color='red',method='lm')+
geom_pointdensity()+
# scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::percent_format())+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
ylab('Item yield')+
xlab('Area between curves')+
labs(title = 'AUC versus item yields')+
facet_wrap(~descr_cor, scales='free', labeller=label_wrap_gen())+
theme(legend.position = 'none',
strip.text.x = element_text(size = 8))
# yields df of kiln
df <- df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
dplyr::filter(KILN2 == "B")
# get top items fired in kiln
df_items <- df %>%
count(DESCRIPTION) %>%
arrange(-n) %>%
slice(1:9)
# filter original df for top items
df <- df %>%
dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)
# get cor values and join to original
df_cor <- df %>%
group_by(DESCRIPTION) %>%
dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
left_join(df) %>%
dplyr::select(DESCRIPTION, cor) %>%
group_by(DESCRIPTION) %>% slice(1)
df <- df %>%
left_join(df_cor) %>%
mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>%
right_join(df)
# plot
df %>%
ggplot(aes(x=aucDiff, y=total_item_pct_yield))+
geom_smooth(alpha=.1, color='red',method='lm')+
geom_pointdensity()+
# scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::percent_format())+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
ylab('Item yield')+
xlab('Area between curves')+
labs(title = 'AUC versus item yields')+
facet_wrap(~descr_cor, scales='free', labeller=label_wrap_gen())+
theme(legend.position = 'none',
strip.text.x = element_text(size = 8))
# yields df of kiln
df <- df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
dplyr::filter(KILN2 == "C")
# get top items fired in kiln
df_items <- df %>%
count(DESCRIPTION) %>%
arrange(-n) %>%
slice(1:9)
# filter original df for top items
df <- df %>%
dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)
# get cor values and join to original
df_cor <- df %>%
group_by(DESCRIPTION) %>%
dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
left_join(df) %>%
dplyr::select(DESCRIPTION, cor) %>%
group_by(DESCRIPTION) %>% slice(1)
df <- df %>%
left_join(df_cor) %>%
mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>%
right_join(df)
# plot
df %>%
ggplot(aes(x=aucDiff, y=total_item_pct_yield))+
geom_smooth(alpha=.1, color='red',method='lm')+
geom_pointdensity()+
# scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::percent_format())+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
ylab('Item yield')+
xlab('Area between curves')+
labs(title = 'AUC versus item yields')+
facet_wrap(~descr_cor, scales='free', labeller=label_wrap_gen())+
theme(legend.position = 'none',
strip.text.x = element_text(size = 8))
# # table
# df %>%
# count(cor, DESCRIPTION) %>%
# arrange(-abs(cor)) %>%
# mutate(
# cor = cell_spec(round(cor,2), 'html', color= ifelse(cor < 0, 'red', 'black'))
# ) %>%
# set_colnames(c("Correlation", "Description", "Observations")) %>%
# kable(format = 'html', escape = 'F') %>%
# kable_styling('striped',full_width = F)
# yields df of kiln
df <- df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
dplyr::filter(KILN2 == "D")
# get top items fired in kiln
df_items <- df %>%
count(DESCRIPTION) %>%
arrange(-n) %>%
slice(1:9)
# filter original df for top items
df <- df %>%
dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)
# get cor values and join to original
df_cor <- df %>%
group_by(DESCRIPTION) %>%
dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
left_join(df) %>%
dplyr::select(DESCRIPTION, cor) %>%
group_by(DESCRIPTION) %>% slice(1)
df <- df %>%
left_join(df_cor) %>%
mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>%
right_join(df)
# plot
df %>%
ggplot(aes(x=aucDiff, y=total_item_pct_yield))+
geom_smooth(alpha=.1, color='red',method='lm')+
geom_pointdensity()+
# scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::percent_format())+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
ylab('Item yield')+
xlab('Area between curves')+
labs(title = 'AUC versus item yields')+
facet_wrap(~descr_cor, scales='free', labeller=label_wrap_gen())+
theme(legend.position = 'none',
strip.text.x = element_text(size = 8))
# yields df of kiln
df <- df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
dplyr::filter(KILN2 == "E")
# get top items fired in kiln
df_items <- df %>%
count(DESCRIPTION) %>%
arrange(-n) %>%
slice(1:9)
# filter original df for top items
df <- df %>%
dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)
# get cor values and join to original
df_cor <- df %>%
group_by(DESCRIPTION) %>%
dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
left_join(df) %>%
dplyr::select(DESCRIPTION, cor) %>%
group_by(DESCRIPTION) %>% slice(1)
df <- df %>%
left_join(df_cor) %>%
mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>%
right_join(df)
# plot
df %>%
ggplot(aes(x=aucDiff, y=total_item_pct_yield))+
geom_smooth(alpha=.1, color='red',method='lm')+
geom_pointdensity()+
# scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::percent_format())+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
ylab('Item yield')+
xlab('Area between curves')+
labs(title = 'AUC versus item yields')+
facet_wrap(~descr_cor, scales='free', labeller=label_wrap_gen())+
theme(legend.position = 'none',
strip.text.x = element_text(size = 8))
# yields df of kiln
df <- df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
dplyr::filter(KILN2 == "F")
# get top items fired in kiln
df_items <- df %>%
count(DESCRIPTION) %>%
arrange(-n) %>%
slice(1:9)
# filter original df for top items
df <- df %>%
dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)
# get cor values and join to original
df_cor <- df %>%
group_by(DESCRIPTION) %>%
dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
left_join(df) %>%
dplyr::select(DESCRIPTION, cor) %>%
group_by(DESCRIPTION) %>% slice(1)
df <- df %>%
left_join(df_cor) %>%
mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>%
right_join(df)
# plot
df %>%
ggplot(aes(x=aucDiff, y=total_item_pct_yield))+
geom_smooth(alpha=.1, color='red',method='lm')+
geom_pointdensity()+
# scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::percent_format())+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
ylab('Item yield')+
xlab('Area between curves')+
labs(title = 'AUC versus item yields')+
facet_wrap(~descr_cor, scales='free', labeller=label_wrap_gen())+
theme(legend.position = 'none',
strip.text.x = element_text(size = 8))
# yields df of kiln
df <- df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
dplyr::filter(KILN2 == "G")
# get top items fired in kiln
df_items <- df %>%
count(DESCRIPTION) %>%
arrange(-n) %>%
slice(1:9)
# filter original df for top items
df <- df %>%
dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)
# get cor values and join to original
df_cor <- df %>%
group_by(DESCRIPTION) %>%
dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
left_join(df) %>%
dplyr::select(DESCRIPTION, cor) %>%
group_by(DESCRIPTION) %>% slice(1)
df <- df %>%
left_join(df_cor) %>%
mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>%
right_join(df)
# plot
df %>%
ggplot(aes(x=aucDiff, y=total_item_pct_yield))+
geom_smooth(alpha=.1, color='red', method='lm')+
geom_pointdensity()+
# scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::percent_format())+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
ylab('Item yield')+
xlab('Area between curves')+
labs(title = 'AUC versus item yields')+
facet_wrap(~descr_cor, scales='free', labeller=label_wrap_gen())+
theme(legend.position = 'none',
strip.text.x = element_text(size = 8))
# yields df of kiln
df <- df_yields_auc %>%
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
dplyr::filter(KILN2 == "H")
# get top items fired in kiln
df_items <- df %>%
count(DESCRIPTION) %>%
arrange(-n) %>%
slice(1:9)
# filter original df for top items
df <- df %>%
dplyr::filter(DESCRIPTION %in% df_items$DESCRIPTION)
# get cor values and join to original
df_cor <- df %>%
group_by(DESCRIPTION) %>%
dplyr::summarise(cor = round(cor(aucDiff, total_item_pct_yield),2)) %>%
left_join(df) %>%
dplyr::select(DESCRIPTION, cor) %>%
group_by(DESCRIPTION) %>% slice(1)
df <- df %>%
left_join(df_cor) %>%
mutate(descr_cor = paste0(DESCRIPTION, " (", cor, ")")) %>%
right_join(df)
# plot
df %>%
ggplot(aes(x=aucDiff, y=total_item_pct_yield))+
geom_smooth(alpha=.1, color='red',method='lm')+
geom_pointdensity()+
# scale_y_continuous(limits = c(0,1),labels = scales::percent_format())+
scale_y_continuous(labels = scales::percent_format())+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_color_viridis_c()+
ylab('Item yield')+
xlab('Area between curves')+
labs(title = 'AUC versus item yields')+
facet_wrap(~descr_cor, scales='free', labeller=label_wrap_gen())+
theme(legend.position = 'none',
strip.text.x = element_text(size = 8))
For example: does a higher AUC value seem to impact the rate of cracked webs observed per lot?
# get lot fired total
df <- df_merged_auc %>%
# lump kilns
mutate(KILN2 = str_replace(KILN, "R", "")) %>%
mutate(reject_count_single_row = reject_vol_single_row_D * vol_piece) %>%
group_by(LOTNO) %>%
dplyr::summarise(total_lot_count_fired = sum(total_item_count_fired_D)) %>%
right_join(df_merged_auc)
# get defects total
df <- df %>%
group_by(LOTNO, CAUSE) %>%
dplyr::summarise(total_defect_count_per_lot = sum(total_item_count_rejected_D)) %>%
right_join(df) %>%
group_by(LOTNO, CAUSE) %>%
slice(1) %>% ungroup() %>%
dplyr::select(LOTNO, CAUSE, total_lot_count_fired, total_defect_count_per_lot, aucDiff) %>%
mutate(pct_defect = 1 - (total_lot_count_fired - total_defect_count_per_lot) / total_lot_count_fired )
# fill missing values
pct_defect_by_lot <- df %>%
pivot_wider(id_cols = LOTNO,
names_from = CAUSE,
values_from = pct_defect,
values_fill = 0)
# join pct defect to aucDiff
pct_defect_by_lot <- pct_defect_by_lot %>%
pivot_longer(cols = BE:BIT) %>%
# join to aucDiff
left_join(
df_merged_auc %>%
mutate(KILN = str_replace(KILN, "R", "")) %>%
group_by(LOTNO) %>% slice(1) %>%
dplyr::select(LOTNO, KILN, aucDiff)
) %>%
set_colnames(c("LOTNO", "CAUSE", "defect_pct", "KILN", "aucDiff")) %>%
mutate_if(is.character, factor)
# for CW, what is the relationship between KILN and aucDiff?
def = "CW"
pct_defect_by_lot %>%
dplyr::filter(CAUSE == def) %>%
group_by(KILN) %>%
dplyr::summarise(cor = cor(defect_pct, aucDiff)) %>%
# arrange(-cor) %>%
right_join(pct_defect_by_lot %>%
dplyr::filter(CAUSE == def)) %>%
dplyr::mutate(kiln_cor = paste0(KILN, " (", round(cor,2), ")")) %>%
ggplot(aes(y=defect_pct, x=aucDiff))+
geom_smooth(alpha=.2,color='red',method='lm')+
geom_pointdensity()+
scale_color_viridis_c()+
facet_wrap(~kiln_cor,scales='free')+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_y_continuous(labels = scales::percent_format())+
# scale_y_continuous(labels = scales::percent_format(), limits = c(0,.15))+
labs(
title = paste0(def, " defect rate per lot vs AUC"),
subtitle = "Correlation value (in parentheses)"
)+
xlab("Area between setpoint, kiln temp")+
ylab(paste0(def, " defect rate"))+
theme(legend.position = 'none')
def = "BE"
pct_defect_by_lot %>%
dplyr::filter(CAUSE == def) %>%
group_by(KILN) %>%
dplyr::summarise(cor = cor(defect_pct, aucDiff)) %>%
# arrange(-cor) %>%
right_join(pct_defect_by_lot %>%
dplyr::filter(CAUSE == def)) %>%
dplyr::mutate(kiln_cor = paste0(KILN, " (", round(cor,2), ")")) %>%
ggplot(aes(y=defect_pct, x=aucDiff))+
geom_smooth(alpha=.2, color='red',method='lm')+
geom_pointdensity()+
scale_color_viridis_c()+
facet_wrap(~kiln_cor,scales='free')+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_y_continuous(labels = scales::percent_format())+
# scale_y_continuous(labels = scales::percent_format(), limits = c(0,.2))+
labs(
title = paste0(def, " defect rate per lot vs AUC"),
subtitle = "Correlation value (in parentheses)"
)+
xlab("Area between setpoint, kiln temp")+
ylab(paste0(def, " defect rate"))+
theme(legend.position = 'none')
def = "DC"
pct_defect_by_lot %>%
dplyr::filter(CAUSE == def) %>%
group_by(KILN) %>%
dplyr::summarise(cor = cor(defect_pct, aucDiff)) %>%
# arrange(-cor) %>%
right_join(pct_defect_by_lot %>%
dplyr::filter(CAUSE == def)) %>%
dplyr::mutate(kiln_cor = paste0(KILN, " (", round(cor,2), ")")) %>%
ggplot(aes(y=defect_pct, x=aucDiff))+
geom_smooth(alpha=.2,color='red',method='lm')+
geom_pointdensity()+
scale_color_viridis_c()+
facet_wrap(~kiln_cor, scales = 'free')+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
# scale_y_continuous(labels = scales::percent_format(), limits = c(0,.1))+
scale_y_continuous(labels = scales::percent_format())+
labs(
title = paste0(def, " defect rate per lot vs AUC"),
subtitle = "Correlation value (in parentheses)"
)+
xlab("Area between setpoint, kiln temp")+
ylab(paste0(def, " defect rate"))+
theme(legend.position = 'none')
Within a specific kiln, how do the defect types and rates change with the AUC feature?
# for each kiln, compare the defect rate vs AUC for each cause
kil = "A"
pct_defect_by_lot %>%
dplyr::filter(KILN == kil) %>%
group_by(CAUSE) %>%
dplyr::summarise(cor = cor(defect_pct, aucDiff),
cor = ifelse(is.na(cor), 0, cor)) %>%
# arrange(-cor) %>%
right_join(pct_defect_by_lot %>%
dplyr::filter(KILN == kil)) %>%
dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
ggplot(aes(y=defect_pct, x=aucDiff))+
geom_smooth(alpha=.2,color='red',method='lm')+
geom_pointdensity(adjust = .2)+
scale_color_viridis_c()+
facet_wrap(~cause_cor, scales='free_y')+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_y_continuous(labels = scales::percent_format())+
# scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
labs(
title = paste0("Kiln ", kil, " defect rate vs AUC"),
subtitle = "Correlation value (in parentheses)"
)+
xlab("Area between setpoint, kiln temp")+
ylab(paste0("Defect rate"))+
theme(legend.position = 'none')
# for each kiln, compare the defect rate vs AUC for each cause
kil = "B"
pct_defect_by_lot %>%
dplyr::filter(KILN == kil) %>%
group_by(CAUSE) %>%
dplyr::summarise(cor = cor(defect_pct, aucDiff),
cor = ifelse(is.na(cor), 0, cor)) %>%
# arrange(-cor) %>%
right_join(pct_defect_by_lot %>%
dplyr::filter(KILN == kil)) %>%
dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
ggplot(aes(y=defect_pct, x=aucDiff))+
geom_smooth(alpha=.2,color='red',method='lm')+
geom_pointdensity(adjust = .2)+
scale_color_viridis_c()+
facet_wrap(~cause_cor, scales='free_y')+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_y_continuous(labels = scales::percent_format())+
# scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
labs(
title = paste0("Kiln ", kil, " defect rate vs AUC"),
subtitle = "Correlation value (in parentheses)"
)+
xlab("Area between setpoint, kiln temp")+
ylab(paste0("Defect rate"))+
theme(legend.position = 'none')
# for each kiln, compare the defect rate vs AUC for each cause
kil = "C"
pct_defect_by_lot %>%
dplyr::filter(KILN == kil) %>%
group_by(CAUSE) %>%
dplyr::summarise(cor = cor(defect_pct, aucDiff),
cor = ifelse(is.na(cor), 0, cor)) %>%
# arrange(-cor) %>%
right_join(pct_defect_by_lot %>%
dplyr::filter(KILN == kil)) %>%
dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
ggplot(aes(y=defect_pct, x=aucDiff))+
geom_smooth(alpha=.2,color='red',method='lm')+
geom_pointdensity(adjust = .1)+
scale_color_viridis_c()+
facet_wrap(~cause_cor, scales='free_y')+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_y_continuous(labels = scales::percent_format())+
# scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
labs(
title = paste0("Kiln ", kil, " defect rate vs AUC"),
subtitle = "Correlation value (in parentheses)"
)+
xlab("Area between setpoint, kiln temp")+
ylab(paste0("Defect rate"))+
theme(legend.position = 'none')
# for each kiln, compare the defect rate vs AUC for each cause
kil = "D"
pct_defect_by_lot %>%
dplyr::filter(KILN == kil) %>%
group_by(CAUSE) %>%
dplyr::summarise(cor = cor(defect_pct, aucDiff),
cor = ifelse(is.na(cor), 0, cor)) %>%
# arrange(-cor) %>%
right_join(pct_defect_by_lot %>%
dplyr::filter(KILN == kil)) %>%
dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
ggplot(aes(y=defect_pct, x=aucDiff))+
geom_smooth(alpha=.2,color='red',method='lm')+
geom_pointdensity(adjust = .2)+
scale_color_viridis_c()+
facet_wrap(~cause_cor, scales='free_y')+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_y_continuous(labels = scales::percent_format())+
# scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
labs(
title = paste0("Kiln ", kil, " defect rate vs AUC"),
subtitle = "Correlation value (in parentheses)"
)+
xlab("Area between setpoint, kiln temp")+
ylab(paste0("Defect rate"))+
theme(legend.position = 'none')
# for each kiln, compare the defect rate vs AUC for each cause
kil = "E"
pct_defect_by_lot %>%
dplyr::filter(KILN == kil) %>%
group_by(CAUSE) %>%
dplyr::summarise(cor = cor(defect_pct, aucDiff),
cor = ifelse(is.na(cor), 0, cor)) %>%
# arrange(-cor) %>%
right_join(pct_defect_by_lot %>%
dplyr::filter(KILN == kil)) %>%
dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
ggplot(aes(y=defect_pct, x=aucDiff))+
geom_smooth(alpha=.2,color='red',method='lm')+
geom_pointdensity(adjust = .3)+
scale_color_viridis_c()+
facet_wrap(~cause_cor, scales='free_y')+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_y_continuous(labels = scales::percent_format())+
# scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
labs(
title = paste0("Kiln ", kil, " defect rate vs AUC"),
subtitle = "Correlation value (in parentheses)"
)+
xlab("Area between setpoint, kiln temp")+
ylab(paste0("Defect rate"))+
theme(legend.position = 'none')
# for each kiln, compare the defect rate vs AUC for each cause
kil = "F"
pct_defect_by_lot %>%
dplyr::filter(KILN == kil) %>%
group_by(CAUSE) %>%
dplyr::summarise(cor = cor(defect_pct, aucDiff),
cor = ifelse(is.na(cor), 0, cor)) %>%
# arrange(-cor) %>%
right_join(pct_defect_by_lot %>%
dplyr::filter(KILN == kil)) %>%
dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
ggplot(aes(y=defect_pct, x=aucDiff))+
geom_smooth(alpha=.2,color='red',method='lm')+
geom_pointdensity(adjust = .2)+
scale_color_viridis_c()+
facet_wrap(~cause_cor, scales='free_y')+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_y_continuous(labels = scales::percent_format())+
# scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
labs(
title = paste0("Kiln ", kil, " defect rate vs AUC"),
subtitle = "Correlation value (in parentheses)"
)+
xlab("Area between setpoint, kiln temp")+
ylab(paste0("Defect rate"))+
theme(legend.position = 'none')
# for each kiln, compare the defect rate vs AUC for each cause
kil = "G"
pct_defect_by_lot %>%
dplyr::filter(KILN == kil) %>%
group_by(CAUSE) %>%
dplyr::summarise(cor = cor(defect_pct, aucDiff),
cor = ifelse(is.na(cor), 0, cor)) %>%
# arrange(-cor) %>%
right_join(pct_defect_by_lot %>%
dplyr::filter(KILN == kil)) %>%
dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
ggplot(aes(y=defect_pct, x=aucDiff))+
geom_smooth(alpha=.2,color='red',method='lm')+
geom_pointdensity(adjust = .2)+
scale_color_viridis_c()+
facet_wrap(~cause_cor, scales='free_y')+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_y_continuous(labels = scales::percent_format())+
# scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
labs(
title = paste0("Kiln ", kil, " defect rate vs AUC"),
subtitle = "Correlation value (in parentheses)"
)+
xlab("Area between setpoint, kiln temp")+
ylab(paste0("Defect rate"))+
theme(legend.position = 'none')
# for each kiln, compare the defect rate vs AUC for each cause
kil = "H"
pct_defect_by_lot %>%
dplyr::filter(KILN == kil) %>%
group_by(CAUSE) %>%
dplyr::summarise(cor = cor(defect_pct, aucDiff),
cor = ifelse(is.na(cor), 0, cor)) %>%
# arrange(-cor) %>%
right_join(pct_defect_by_lot %>%
dplyr::filter(KILN == kil)) %>%
dplyr::mutate(cause_cor = paste0(CAUSE, " (", round(cor,2), ")")) %>%
ggplot(aes(y=defect_pct, x=aucDiff))+
geom_smooth(alpha=.2,color='red',method='lm')+
geom_pointdensity(adjust = .2)+
scale_color_viridis_c()+
facet_wrap(~cause_cor, scales='free_y')+
scale_x_continuous(labels = scales::number_format(scale=1e-3, suffix='K'))+
scale_y_continuous(labels = scales::percent_format())+
# scale_y_continuous(labels = scales::percent_format(), limits = c(0,.06))+
labs(
title = paste0("Kiln ", kil, " defect rate vs AUC"),
subtitle = "Correlation value (in parentheses)"
)+
xlab("Area between setpoint, kiln temp")+
ylab(paste0("Defect rate"))+
theme(legend.position = 'none')